# Replication Materials for
# "Non-ignorable Attrition in Pairwise Randomized Experiments"
# by Kentaro Fukumoto
# in Political Analysis, forthcoming
# October 21, 2020
#
# This R script replicates Table 1 and Figures 1 through 3 in the Supplementary Material.

data <- read.csv("data.csv")
attach(data)

X <- data[, 3]
treat <- which(X == 1)
control <- which(X == 0)

my.var <- function(x){
  sum((x - mean(x))^2)/length(x)
}
# var(): The denominator n - 1 is used which gives an unbiased estimator of the (co)variance for i.i.d. observations.
my.cov <- function(x, y){
  sum((x - mean(x)) * (y - mean(y)))/length(x)
}

out.U <- out.P <- matrix(NA, 4, 4)
Nt <- Nc <- Np <- array()

for(v in 1:3){ # Years 1999-2001
  Y <- data[, (v + 10)] * 100
  Y.control.paired <- Y.control.obs <- Y[control]
  Y.treat.obs <- Y[treat]
  Y.treat.paired <- Y[treat[-6]] # Pair 6 deleted
  dif.Y <- Y.treat.paired - Y.control.paired
  Nt[v] <- length(Y.treat.obs)
  Nc[v] <- length(Y.control.obs)
  Np[v] <- length(dif.Y)
  
  # unitwise deletion
  out.U[v, 1] <- mean(Y.treat.obs) - mean(Y.control.obs) # tau.U
  out.U[v, 2] <- ((1/(Nt[v] - 1)) * my.var(Y.treat.obs)) + ((1/(Nc[v] - 1)) * my.var(Y.control.obs)) # Neyman variance estimator
  out.U[v, 3] <-  out.U[v, 2] - (2 * (Np[v]/(Nt[v] * Nc[v])) * (Np[v]/(Np[v] - 1)) * my.cov(Y.treat.paired, Y.control.paired)) # Adjusted Neyman variance estimator
  
  #pairwise deletion
  out.P[v, 1] <- mean(Y.treat.paired) - mean(Y.control.paired) # tau.P
  out.P[v, 2] <- ((1/(Np[v] - 1)) * my.var(Y.treat.paired)) + ((1/(Np[v] - 1)) * my.var(Y.control.paired))  # Neyman variance estimator
  out.P[v, 3] <-  out.P[v, 2] - ((2/(Np[v] - 1)) * my.cov(Y.treat.paired, Y.control.paired)) # Adjusted Neyman variance estimator  
  out.P[v, 4] <- my.var(dif.Y)/(Np[v] - 1) # pairwise variance estimator
}

v <- 4 # Year 2002
Y <- data[, (v + 10)] * 100
Y.control.paired <- Y.control.obs <- Y[control[-16]] # Pair 17 deleted
Y.treat.obs <- Y[treat]
Y.treat.paired <- Y[treat[-c(6, 17)]] # Pairs 6,  17 deleted
dif.Y <- Y.treat.paired - Y.control.paired
Nt[v] <- length(Y.treat.obs)
Nc[v] <- length(Y.control.obs)
Np[v] <- length(dif.Y)

#unitwise deletion
out.U[v, 1] <- mean(Y.treat.obs) - mean(Y.control.obs) # tau.U
out.U[v, 2] <- ((1/(Nt[v] - 1)) * my.var(Y.treat.obs)) + ((1/(Nc[v] - 1)) * my.var(Y.control.obs)) # Neyman variance estimator
out.U[v, 3] <-  out.U[v, 2] - (2 * (Np[v]/(Nt[v] * Nc[v])) * (Np[v]/(Np[v] - 1)) * my.cov(Y.treat.paired, Y.control.paired)) # Adjusted Neyman variance estimator

#pairwise deletion
out.P[v, 1] <- mean(Y.treat.paired) - mean(Y.control.paired) # tau.P
out.P[v, 2] <- ((1/(Np[v] - 1)) * my.var(Y.treat.paired)) + ((1/(Np[v] - 1)) * my.var(Y.control.paired)) # Neyman variance estimator
out.P[v, 3] <-  out.P[v, 2] - ((2/(Np[v] - 1)) * my.cov(Y.treat.paired, Y.control.paired)) # Adjusted Neyman variance estimator  
out.P[v, 4] <- my.var(dif.Y)/(Np[v] - 1) # pairwise variance estimator

out.U[, c(2:4)] <- sqrt(out.U[, c(2:4)] )
out.P[, c(2:4)] <- sqrt(out.P[, c(2:4)] )

colnames(out.U) <- colnames(out.P) <- c("tau", "Neyman", "Adj", "pair")
rownames(out.U) <- rownames(out.P) <- c("Matched-on (1999)", "Pretreatment (2000)", "Posttreatment (2001)", "Placebo (2002)")

#########
# Table 1

(out.U2 <- round(out.U[, c(1:3)], 2))
(out.P2 <- round(out.P[, c(1:3)], 2))
table1 <- matrix(NA, 16, 2)

for(i in 1:4){
  for(j in 1:3){
    table1[(i * 3) + j - 3, 1] <- out.P2[i, j]    
    table1[(i * 3) + j - 3, 2] <- out.U2[i, j]
  }
}
table1[c(15, 16), 1] <- Np[c(3:4)]
table1[c(13:16), 2] <- c(Nt[c(3:4)], Nc[c(3:4)])

colnames(table1) <- c("Unitwise", "Pairwise")
rownames(table1) <- c("Matched-on (1999)", " ", " ",
                      "Pretreatment (2000)", " ", " ",
                      "Posttreatment (2001)", " ", " ",
                      "Placebo (2002)", " ", " ",
                      "N^T_U (1999-2001)", "N^T_U (2002)",
                      "N^C_U, N_P (1999-2001)", "N^C_U, N_P (2002)")

table1
write.csv(table1, "table1.csv", row.names = F)

#########
# Figures

out.sum <- rbind(out.U2[1, ], out.P2[1, ])
b <- out.sum[, 1]
lab <- c("UDE", "PDE")

figure <- function(critical.value){
  bound.Neyman.LB <- out.sum[, 1] - (out.sum[, 2] * critical.value)
  bound.Neyman.UB <- out.sum[, 1] + (out.sum[, 2] * critical.value)
  bound.Adj.LB <- out.sum[, 1] - (out.sum[, 3] * critical.value)
  bound.Adj.UB <- out.sum[, 1] + (out.sum[, 3] * critical.value)
  plot(NULL, 
       xlim = c(-0.8, 0.2), 
       ylim = c(0, 2.5), 
       axes = T, 
       yaxt = "n", 
       xlab = "Treatment Effect on Bagrut Rate in 1999 (% Points)",  
       ylab = NA, 
       bty = "n"
  )
  for(i in 1:2){
    height <- i 
    points(b[i],  height,  pch=19)
    lines(x = c(bound.Adj.LB[i], bound.Adj.UB[i]),  y = rep(height,  2))
    text(min(bound.Adj.LB) - 0.1,  height + 0.25,  lab[i],  pos = 4,  cex = 0.8)
  }
  abline(v=0, lty=2)
}

# Figure 1
critical.value <- qt(0.975, 19)
pdf("figure1.pdf", width=6, height=3)
figure(critical.value)
dev.off()
  
# Figure 2
critical.value <- qt(0.975, 20)
pdf("figure2.pdf", width=6, height=3)
figure(critical.value)
dev.off()

# Figure 3
critical.value <- qnorm(0.975)
pdf("figure3.pdf", width=6, height=3)
figure(critical.value)
dev.off()